# !conda install cf_xarray -yExploring PACE Surface Reflectance Data
1. PACE OCI L2 Surface Reflectance
Summary
Learning Objectives
Contents
- Read PACE L2 Surface Reflectance data
- Mask clouds and water from images
- Display spectral signatures for selected locations
- Calculate unique spectral indices
1. Setup
# Import required libraries
import cartopy
import cf_xarray
import earthaccess
import numpy as np
import rasterio as rio
import hvplot.xarray
import matplotlib.pyplot as plt
import xarray as xr
import pyproj
import holoviews as hv
from io import BytesIOauth = earthaccess.login(persist=True)tspan = ("2025-03-01", "2025-03-20")
bbox = (-86.512, 31.70, -86.512, 34.938)
results = earthaccess.search_data(
short_name="PACE_OCI_L2_SFREFL",
temporal=tspan,
bounding_box=bbox,
)links = [result.data_links()[0] for result in results]
fs = earthaccess.get_fsspec_https_session()To open the PACE L2 Surface Reflectance data, we will use a combination of fsspec and BytesIO to read the data directly into memory. These files are roughly 750 mb each. The fsspec https session authenticates via NASA Earthdata credentials, allowing access to the data and BytesIO allows us to read the data into memory without saving it to disk.
with fs.open(links[0]) as f:
file_content = BytesIO(f.read())These files are heirarchichal netCDF files, so we will use the datatree function to read in and view all of the groups. Using the datatree representation, we can browse through the groups and variables in the file, which is useful for understanding the structure of the data, and for selecting what we want to work with.
datatree = xr.open_datatree(file_content, decode_timedelta=False)
datatree<xarray.DatasetView> Size: 0B
Dimensions: ()
Data variables:
*empty*
Attributes: (12/45)
title: OCI Level-2 Data SFREFL
product_name: PACE_OCI.20250301T183734.L2.SFREFL.V3_...
processing_version: 3.0
history: l2gen par=/data11/sdpsoper/vdc/vpu10/w...
instrument: OCI
platform: PACE
... ...
geospatial_lon_max: -74.50793
geospatial_lon_min: -105.216805
startDirection: Ascending
endDirection: Ascending
day_night_flag: Day
earth_sun_distance_correction: 1.0183274745941162TODO Explanation of data structure and selection of these variables
ds = xr.merge(
(
datatree.ds,
datatree["geophysical_data"].ds[["rhos", "l2_flags"]],
datatree["sensor_band_parameters"].coords,
datatree["navigation_data"].ds.set_coords(("longitude", "latitude")).coords,
)
)
ds<xarray.Dataset> Size: 1GB
Dimensions: (number_of_lines: 1709, pixels_per_line: 1272,
wavelength_3d: 122)
Coordinates:
* wavelength_3d (wavelength_3d) float64 976B 346.0 351.0 ... 2.258e+03
longitude (number_of_lines, pixels_per_line) float32 9MB ...
latitude (number_of_lines, pixels_per_line) float32 9MB ...
Dimensions without coordinates: number_of_lines, pixels_per_line
Data variables:
rhos (number_of_lines, pixels_per_line, wavelength_3d) float32 1GB ...
l2_flags (number_of_lines, pixels_per_line) int32 9MB ...
Attributes: (12/45)
title: OCI Level-2 Data SFREFL
product_name: PACE_OCI.20250301T183734.L2.SFREFL.V3_...
processing_version: 3.0
history: l2gen par=/data11/sdpsoper/vdc/vpu10/w...
instrument: OCI
platform: PACE
... ...
geospatial_lon_max: -74.50793
geospatial_lon_min: -105.216805
startDirection: Ascending
endDirection: Ascending
day_night_flag: Day
earth_sun_distance_correction: 1.0183274745941162# def tap_bounds(to_tap, res, direction):
# """Tap bounds to regular grid."""
# mult = 1 if to_tap >= 0 else -1
# abs_to = abs(to_tap)
# abs_res = abs(res)
# mod_val = abs_to % abs_res
# if mod_val == 0:
# adj = 0
# elif direction == "up" and mult == 1:
# adj = abs_res - mod_val
# elif direction == "down" and mult == 1:
# adj = -mod_val
# elif direction == "up" and mult == -1:
# adj = -mod_val
# elif direction == "down" and mult == -1:
# adj = abs_res - mod_val
# else:
# raise ValueError("direction must be 'up' or 'down'")
# return mult * (abs_to + adj)
# def build_pace_glt(ds, target_resolution=(0.01,-0.01), radius_of_influence=5000):
# """
# Build GLT array from xarray Dataset of PACE navigation data.
# Parameters:
# - ds: xarray.Dataset with 'latitude' and 'longitude' variables.
# - target_resolution: tuple (res_x, res_y) in degrees; res_y can be negative.
# - target_extent_ul_lr: optional tuple (min_x, max_y, max_x, min_y); else computed from data.
# - radius_of_influence: float, max distance in meters for nearest neighbor.
# Returns:
# - np.ndarray of shape (2, y_size, x_size): [0,:,:] = column indices (1-based), [1,:,:] = row indices (1-based); -9999 for invalid.
# """
# lats = ds['latitude'].values
# lons = ds['longitude'].values
# lines, pixels = lons.shape
# min_x = np.nanmin(lons)
# max_y = np.nanmax(lats)
# max_x = np.nanmax(lons)
# min_y = np.nanmin(lats)
# res_x, res_y = target_resolution
# # Tap bounds for alignment
# min_x = tap_bounds(min_x, res_x, "down")
# max_y = tap_bounds(max_y, res_y, "up")
# max_x = tap_bounds(max_x, res_x, "up")
# min_y = tap_bounds(min_y, res_y, "down")
# x_size = int(np.ceil((max_x - min_x) / abs(res_x)))
# y_size = int(np.ceil((max_y - min_y) / abs(res_y)))
# # Define swath and area
# swath_def = pyresample.geometry.SwathDefinition(lons=lons, lats=lats)
# area_extent = (min_x, min_y, max_x, max_y)
# area_def = pyresample.geometry.AreaDefinition(
# 'glt_grid', 'GLT grid', 'latlong',
# {'proj': 'latlong', 'ellps': 'WGS84'},
# x_size, y_size, area_extent
# )
# # Get neighbour info using pyresample's KDTree (geospatial-aware)
# valid_input, valid_output, index_array, distance_array = pyresample.kd_tree.get_neighbour_info(
# swath_def, area_def, radius_of_influence=5000, neighbours=1
# )
# # Set invalid to -1
# index_array = index_array.flatten().astype(np.int64) # Ensure flat
# index_array[~valid_output.flatten()] = -1
# # Unravel to col (pixel), row (line), 1-based
# glt_col = np.full(index_array.shape, -9999, dtype=int)
# glt_row = np.full(index_array.shape, -9999, dtype=int)
# valid_map = index_array != -1
# glt_row[valid_map] = (index_array[valid_map] // pixels) + 1
# glt_col[valid_map] = (index_array[valid_map] % pixels) + 1
# # Reshape and stack
# glt = np.stack((glt_col.reshape(y_size, x_size), glt_row.reshape(y_size, x_size)))
# return glt#Check the wavelengths available for PACE
ds["wavelength_3d"]<xarray.DataArray 'wavelength_3d' (wavelength_3d: 122)> Size: 976B
array([ 346., 351., 356., 361., 366., 371., 375., 380., 385., 390.,
395., 400., 405., 410., 415., 420., 425., 430., 435., 440.,
445., 450., 455., 460., 465., 470., 475., 480., 485., 490.,
495., 500., 505., 510., 515., 520., 525., 530., 535., 540.,
545., 550., 555., 560., 565., 570., 575., 580., 586., 615.,
620., 625., 630., 635., 640., 642., 645., 647., 650., 652.,
655., 657., 660., 662., 665., 667., 670., 672., 675., 677.,
679., 682., 697., 699., 702., 704., 707., 709., 712., 714.,
719., 724., 729., 734., 739., 742., 744., 747., 749., 752.,
754., 772., 774., 779., 784., 789., 794., 799., 804., 809.,
814., 819., 824., 829., 835., 840., 845., 850., 855., 860.,
865., 870., 875., 880., 885., 890., 895., 1038., 1249., 1618.,
2131., 2258.])
Coordinates:
* wavelength_3d (wavelength_3d) float64 976B 346.0 351.0 ... 2.258e+03
Attributes:
long_name: wavelengths
units: nm
valid_min: 0
valid_max: 20000rhos_860 = ds["rhos"].sel({"wavelength_3d": 860}, method="nearest")
fig, ax = plt.subplots(figsize=(9, 5), subplot_kw={"projection": cartopy.crs.PlateCarree()})
ax.gridlines(draw_labels={"left": "y", "bottom": "x"}, linewidth=0.25)
ax.coastlines(linewidth=0.5)
ax.add_feature(cartopy.feature.OCEAN, edgecolor="w", linewidth=0.01)
ax.add_feature(cartopy.feature.LAND, edgecolor="w", linewidth=0.01)
rhos_860.plot(x="longitude", y="latitude", cmap="Greys_r", vmin=0, vmax=1.0)
plt.show()
rhos_860<xarray.DataArray 'rhos' (number_of_lines: 1709, pixels_per_line: 1272)> Size: 9MB
[2173848 values with dtype=float32]
Coordinates:
wavelength_3d float64 8B 860.0
longitude (number_of_lines, pixels_per_line) float32 9MB ...
latitude (number_of_lines, pixels_per_line) float32 9MB ...
Dimensions without coordinates: number_of_lines, pixels_per_line
Attributes:
long_name: Surface reflectance
valid_min: -0.05
valid_max: 1.0#Now flags
ds["l2_flags"]<xarray.DataArray 'l2_flags' (number_of_lines: 1709, pixels_per_line: 1272)> Size: 9MB
[2173848 values with dtype=int32]
Coordinates:
longitude (number_of_lines, pixels_per_line) float32 9MB ...
latitude (number_of_lines, pixels_per_line) float32 9MB ...
Dimensions without coordinates: number_of_lines, pixels_per_line
Attributes:
long_name: Level-2 Processing Flags
valid_min: -2147483648
valid_max: 2147483647
flag_masks: [ 1 2 4 8 ...
flag_meanings: ATMFAIL LAND PRODWARN HIGLINT HILT HISATZEN COASTZ SPARE ...Use the cf_xarray library to create a mask including only land and excluding clouds and ice.
# Create Mask
ds["l2_flags"].cf.is_flag_variable
cldwater_mask = (
(ds["l2_flags"].cf == "LAND")
& ~(ds["l2_flags"].cf == "CLDICE")
)
# Apply mask, creating new dataset
rhos = ds["rhos"].where(cldwater_mask)# Plot a band of masked data
rhos_860 = rhos.sel({"wavelength_3d": 860}, method="nearest")
fig, ax = plt.subplots(figsize=(9, 5), subplot_kw={"projection": cartopy.crs.PlateCarree()})
ax.coastlines(linewidth=0.25)
ax.gridlines(draw_labels={"left": "y", "bottom": "x"}, linewidth=0.25)
ax.add_feature(cartopy.feature.OCEAN, edgecolor="w", linewidth=0.01)
ax.add_feature(cartopy.feature.LAND, edgecolor="w", linewidth=0.01)
ax.add_feature(cartopy.feature.LAKES, edgecolor="k", linewidth=0.1)
rhos_860.plot(x="longitude", y="latitude", cmap="Greys_r", vmin=0, vmax=1.0)
plt.show()
To more easily work with the data we can project and resample it onto a regular grid.
To do this with rioxarray, we need to change the order of the dimensions in the array. Hyperspectral data is often stored (y,x,band) since most processing; however, rioxarray expects the dimensions to be in the order of (band, y, x), so we must transpose the array to grid the data using rioxarray.
# Transpose data
sr_src = rhos.transpose("wavelength_3d", ...)
sr_src<xarray.DataArray 'rhos' (wavelength_3d: 122, number_of_lines: 1709,
pixels_per_line: 1272)> Size: 1GB
array([[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
...
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]], dtype=float32)
Coordinates:
* wavelength_3d (wavelength_3d) float64 976B 346.0 351.0 ... 2.258e+03
longitude (number_of_lines, pixels_per_line) float32 9MB -99.13 ... ...
latitude (number_of_lines, pixels_per_line) float32 9MB 13.6 ... 36.6
Dimensions without coordinates: number_of_lines, pixels_per_line
Attributes:
long_name: Surface reflectance
valid_min: -0.05
valid_max: 1.0# # Add coordinates for longitude and latitude
# sr_src.coords["longitude"] = ds["longitude"]
# sr_src.coords["latitude"] = ds["latitude"]
sr_src = sr_src.rio.set_spatial_dims("pixels_per_line", "number_of_lines").rio.write_crs("epsg:4326")
sr_src<xarray.DataArray 'rhos' (wavelength_3d: 122, number_of_lines: 1709,
pixels_per_line: 1272)> Size: 1GB
array([[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
...
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]], dtype=float32)
Coordinates:
* wavelength_3d (wavelength_3d) float64 976B 346.0 351.0 ... 2.258e+03
longitude (number_of_lines, pixels_per_line) float32 9MB -99.13 ... ...
latitude (number_of_lines, pixels_per_line) float32 9MB 13.6 ... 36.6
spatial_ref int64 8B 0
Dimensions without coordinates: number_of_lines, pixels_per_line
Attributes:
long_name: Surface reflectance
valid_min: -0.05
valid_max: 1.0#If too many pixels too much memory could be used...........................
sr_dst = sr_src.rio.reproject(
dst_crs=sr_src.rio.crs,
src_geoloc_array=(
sr_src.coords["longitude"],
sr_src.coords["latitude"],
),
nodata=np.nan
).rename({'y': 'latitude', 'x': 'longitude'})sr_dst<xarray.DataArray 'rhos' (wavelength_3d: 122, latitude: 1524, longitude: 2037)> Size: 2GB
array([[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
...
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]],
[[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]]], dtype=float32)
Coordinates:
* longitude (longitude) float64 16kB -105.2 -105.2 ... -74.46 -74.44
* latitude (latitude) float64 12kB 36.62 36.6 36.59 ... 13.63 13.62 13.6
* wavelength_3d (wavelength_3d) float64 976B 346.0 351.0 ... 2.258e+03
spatial_ref int64 8B 0
Attributes:
long_name: Surface reflectance
valid_min: -0.05
valid_max: 1.0
_FillValue: nandef create_pace_rgb(rhos_ds, red=610, green=555, blue=465, pct_low=0.02, pct_high=0.98, gamma=0.5):
ds_rgb = sr_dst.sel(wavelength_3d=[red,green, blue], method='nearest')
# Create NaN Mask
nan_mask = np.isnan(ds_rgb.data)
# Stretch and Clip
for i in range(ds_rgb.data.shape[0]):
channel = ds_rgb.data[i]
valid = channel[~nan_mask[i]]
if len(valid) > 0:
p_low = np.percentile(valid, pct_low)
p_high = np.percentile(valid, pct_high)
channel = (channel - p_low) / (p_high - p_low)
np.clip(channel, 0, 1, out=channel)
# Apply Gamma and Clip
ds_rgb.data = np.power(ds_rgb.data, 0.5)
np.clip(ds_rgb.data, 0, 1, out=ds_rgb.data)
return ds_rgb# # Plot RGB Image - Still needs some brightening
# red_approx = 680.0
# green_approx = 540.0
# blue_approx = 450.0rgb_da = create_pace_rgb(sr_dst, pct_low=0.01, pct_high=0.99, gamma=0.3)
rgb = rgb_da.hvplot.rgb(x='longitude', y='latitude', bands='wavelength_3d', aspect = 'equal', frame_height=400, geo=True, crs='EPSG:4326', title="RGB PACE Image with Pre-selected bands")
rgbC:\Users\ebolch\AppData\Local\Temp\1\ipykernel_14144\2150097913.py:20: RuntimeWarning: invalid value encountered in power
ds_rgb.data = np.power(ds_rgb.data, 0.5)